# This script reproduces Figure 17 and Table 12 #

rm(list=ls())
options(digits=4)
pkg <- c("PanelMatch", "ggplot2", "plm", 
         "matrixStats",
         "foreign", 
         "stringi", "stringdist",
         "DataCombine","lmtest", "multiwayvcov",
         "data.table",
         "dplyr", 
         "stargazer",
         "clubSandwich", "mediation",
         "doBy")
lapply(pkg, require, character.only = TRUE)

# load datasets
proprietary <- read.csv("proprietary.csv")
load("non_p.RData")
source("functions.R")
# merge 
d_sub1 <- merge(d_sub1, proprietary, by = c("county_id", "time"), all.x = T)

load("trans_data.RData")

d_sub1 <- d_sub1[,c("city_county", "county_name", "county_id", 
            "city_id", "month", "city_name", "log_transaction_area", 
            "log_transaction_area_l1", "tour", "tour_l1", "all_tour", "all_tour_l1",
            "log_gdp_l1", "log_gdppc_l1", "log_gdp", "log_gdppc",
            "city_tour", "log_number_total", 
            "log_expd", "log_industrial",
            "log_hos", "log_fia",
            "log_loan", "log_middle_school",
            "log_rev")]

d_sub1$year <- as.numeric(substrRight(as.character(d_sub1$month), 4))

d_sub1$transaction_area <- exp(d_sub1$log_transaction_area)

d_sub1 <- merge(d_sub1, trans_data, by = c("city_name", "year"), all.x = T)

year_d <- d_sub1 %>% group_by(county_id, year) %>% 
  dplyr::summarize(transaction_area = mean(transaction_area, na.rm = TRUE),
                   tour_extent = sum(tour, na.rm = TRUE), 
                   log_gdp = mean(log_gdp,na.rm = T),
                   log_gdppc = mean(log_gdppc,na.rm = T),
                   log_fia = mean(log_fia, na.rm = T),
                   log_loan = mean(log_loan, na.rm = T),
                   log_hos = mean(log_hos, na.rm = T),
                   log_industrial = mean(log_industrial, na.rm = T),
                   log_rev = mean(log_rev, na.rm = T),
                   log_expd = mean(log_expd, na.rm = T),
                   log_middle_school = mean(log_middle_school, na.rm = T),
                   Trans = mean(Trans, na.rm = T))

year_d$log_transaction_area <- log(year_d$transaction_area + .01)
year_d <- as.data.frame(year_d)


var_names <- c("log_gdp", "log_gdppc", "log_fia", 
               "log_loan", "log_hos", "log_industrial", 
               "log_rev", "log_expd", "log_middle_school")

for (i in 1:length(var_names)) {
  year_d <- missing_indicator(var_names[i],
                              year_d)
}
year_d$tour_extent_sd <- standardize(year_d$tour_extent)

year_d$Trans[which(year_d$Trans == "NaN")] <- NA

tour_Trans_con <- plm(formula = lead(tour_extent) ~ 
                        Trans + log_gdp + log_gdp_missing + 
                        log_gdppc + log_gdppc_missing + 
                        log_fia + log_fia_missing + 
                        log_loan + log_loan_missing + 
                        log_hos + log_hos_missing +
                        log_industrial + log_industrial_missing + 
                        log_rev + log_rev_missing + 
                        log_expd + log_expd_missing + 
                        log_middle_school + log_middle_school_missing,
                      index = c("county_id", "year"),
                      effect = "twoways",
                      data = year_d)
vcov_tour_Trans_con <- clubSandwich::vcovCR(tour_Trans_con, type="CR1S", cluster = year_d$county_id)
se_Trans_con <- coeftest(tour_Trans_con, vcov_tour_Trans_con)
reg_results_con_land <- list("model" = tour_Trans_con, 
                             "se" = se_Trans_con)

tour_Trans_nocon <- plm(formula = lead(tour_extent, 1) ~ 
                          Trans,
                        index = c("county_id", "year"),
                        effect = "twoways",
                        data = year_d)
vcov_tour_Trans_nocon <- clubSandwich::vcovCR(tour_Trans_nocon, type="CR1S", cluster = year_d$county_id)
se_Trans_nocon <- coeftest(tour_Trans_nocon, vcov_tour_Trans_nocon)
reg_results_land <- list("model" = tour_Trans_nocon, 
                         "se" = se_Trans_nocon)


lower_Trans_nocon_Trans_95 <- se_Trans_nocon["Trans", "Estimate"] - qnorm(.975) * se_Trans_nocon["Trans", "Std. Error"]
upper_Trans_nocon_Trans_95 <- se_Trans_nocon["Trans", "Estimate"] + qnorm(.975) * se_Trans_nocon["Trans", "Std. Error"]

lower_Trans_nocon_Trans_90 <- se_Trans_nocon["Trans", "Estimate"] - qnorm(.95) * se_Trans_nocon["Trans", "Std. Error"]
upper_Trans_nocon_Trans_90 <- se_Trans_nocon["Trans", "Estimate"] + qnorm(.95) * se_Trans_nocon["Trans", "Std. Error"]

est_Trans <- se_Trans_nocon["Trans", "Estimate"]

lower_Trans_con_Trans_95 <- se_Trans_con["Trans", "Estimate"] - qnorm(.975) * se_Trans_con["Trans", "Std. Error"]
upper_Trans_con_Trans_95 <- se_Trans_con["Trans", "Estimate"] + qnorm(.975) * se_Trans_con["Trans", "Std. Error"]

lower_Trans_con_Trans_90 <- se_Trans_con["Trans", "Estimate"] - qnorm(.95) * se_Trans_con["Trans", "Std. Error"]
upper_Trans_con_Trans_90 <- se_Trans_con["Trans", "Estimate"] + qnorm(.95) * se_Trans_con["Trans", "Std. Error"]

est_Trans_con <- se_Trans_con["Trans", "Estimate"]

pdf("output/Figure17.pdf")
plot(x = 0, xlab = "", ylab = "Effect Estimates", 
     pch = "",
     ylim = c(-.001, .001),
     xlim = c(0.95, 1.15),
     xaxt = "n",
     main = "Correlation between Government Transparency \n and No. Months under Inspection Next Year")
lines(c(1,1), c(lower_Trans_nocon_Trans_95, upper_Trans_nocon_Trans_95))
lines(c(1,1), c(lower_Trans_nocon_Trans_90, upper_Trans_nocon_Trans_90), 
      lwd = 5, col = "grey")
points(1, est_Trans, pch = 19)
text(1, -.0005, "Without controls")

lines(c(1.1,1.1), c(lower_Trans_con_Trans_95, upper_Trans_con_Trans_95))
lines(c(1.1,1.1), c(lower_Trans_con_Trans_90, upper_Trans_con_Trans_90), 
      lwd = 5, col = "grey")
points(1.1, est_Trans_con, pch = 19)
text(1.1, -.0005, "With controls")

abline(h = 0, lty = 3)
dev.off()


### Making a table ###
sink("output/Table12.tex")
stargazer(reg_results_land$model,
          reg_results_con_land$model,
          font.size = "large", digits = 4, 
          type = "latex",
          dep.var.labels = c("Number of Months under Inspection Next Year",
                             "Number of Months under Inspection Next Year"),
          column.labels = c("Number of Months under Inspection Next Year",
                            "Number of Months under Inspection Next Year"),
          dep.var.labels.include = F,
          style = "qje", no.space = FALSE, keep.stat = c("n", "rsq"),
          omit = c("county_id", "year",
                   "log_gdp_missing", 
                   "log_gdppc_missing", 
                   "log_fia_missing", 
                   "log_loan_missing", 
                   "log_hos_missing",
                   "log_industrial_missing", 
                   "log_rev_missing", 
                   "log_expd_missing", 
                   "log_middle_school_missing"
                   
          ),
          omit.yes.no = c("No", "Yes"),
          add.lines = list(c("County FE?", "Yes", "Yes"),
                           c("Time FE?", "Yes", "Yes"),
                           c("Controls?", "No", "Yes")
          ),
          covariate.labels = c("Inspection",
                               "logged GDP per capita last year", 
                               "logged total GDP last year", 
                               "logged fixed asset investment last year",
                               "logged volume of loans last year", 
                               "logged number of hospitals last year", 
                               "logged amount of industrial output last year", 
                               "logged revenue last year", 
                               "logged expenditure last year", 
                               "logged number of secondary schools last year"),
          star.cutoffs = c(.10, .05, .01), 
          se = list(reg_results_land$se[,2], 
                    reg_results_con_land$se[,2]),
          p = list(reg_results_land$se[,4], 
                   reg_results_con_land$se[,4]),
          header = F, notes = "robust standard errors clustered by county in parentheses", 
          float = F)
sink()


